#################################################################
#####                                                       #####
#####   Input: Debates data                                 #####
#####   Output: Creation of all speech-level covariates     #####
#####                                                       #####
#################################################################

# Load libraries

library(quanteda) # v3.3.1
library(data.table) # v1.14.8
library(plyr) # v1.8.9

# Load data

load("data/debates.Rdata") 

## Drop pre-Blair
debates <- debates[debates$parliamentary_term != "1992-1997"]

# Add debate meta-data
debates$gov_mp <- FALSE
debates$gov_mp[debates$party == "Labour" & debates$parliamentary_term %in% 
                 c("1997-2001", "2001-2005", "2005-2010")] <-TRUE
debates$gov_mp[debates$party %in% c("Conservative", "LibDem") &
                 debates$parliamentary_term %in% c("2010-2015")] <- TRUE
debates$gov_mp[debates$party %in% c("Conservative") &
                 debates$parliamentary_term %in% c("2015-2017", "2017-2019")] <- TRUE

# Party edits

debates$party_short <- factor(NA, levels = c("Conservative", "Labour", "Lib Dem", "Green", "Other party"))
debates$party_short[debates$party == "Conservative"] <- "Conservative"
debates$party_short[debates$party == "Labour"] <- "Labour"
debates$party_short[debates$party == "Liberal Democrat"] <- "Lib Dem"
debates$party_short[debates$party == "Green Party"] <- "Green"
debates$party_short[debates$party == "Change UK - The Independent Group" |
                      debates$party == "Alliance" |
                      debates$party == "Conservative Independent" |
                      debates$party == "Other" |
                      debates$party == "Scottish National Party" |
                      debates$party == "UK Independence Party" |
                      debates$party == "United Kingdom Unionist" |
                      debates$party == "Democratic Unionist Party" |
                      debates$party == "Independent" |
                      debates$party == "Independent Conservative" |
                      debates$party == "Independent Labour" |
                      debates$party == "Independent Unionist" |
                      debates$party == "Plaid Cymru" |
                      debates$party == "Respect" |
                      debates$party == "Social Democratic & Labour Party" |
                      debates$party == "Speaker" |
                      debates$party == "Ulster Unionist Party" |
                      debates$party == "United Kingdom Unionist" |
                      debates$party == "The Independent Group for Change"] <- "Other party"

debates$years_in_parliament <- debates$days_in_house / 365

debates$joiners <- debates$days_in_house <= 365

debates$diff_in_days <- difftime(debates$left_house, debates$hdate, unit = c("days"))
debates$leavers <- debates$diff_in_days <= 365

## Education & occupation

education_occupation <- read.csv("data/mp_data/final_occupation_education_v2.csv") 

education_occupation$has_degree[is.na(education_occupation$has_degree)] <- TRUE # 3 MPs missing, imputed as graduates

debates <-
  merge(debates,
        education_occupation[, names(education_occupation) != "name"],
        by = c("person_id"),
        all = TRUE)

## Debate types

### QT
debates$question_time <- grepl("Oral answers to question", debates$parent, ignore.case = T)

### PMQs
debates$pm_question_time <- debates$parent == "Oral Answers to Questions:  Prime Minister"

### Opposition Day & Backbench Business
opp_bbb <-
  c(
    "Opposition Day",
    "Opposition Day:",
    "Backbench Business",
    "Backbench Business:",
    "Adjournment",
    "Private Members' Bills",
    "Remaining Private Members' Bills"
  )
debates$backbench_opposition_day <- grepl(paste(opp_bbb, collapse = "|"), 
                                          debates$parent, ignore.case = TRUE)

### Procedural stuff
procedure <-
  c(
    "Orders of the Day",
    "Orders of the Day:",
    "Business of the House",
    "House Procedures",
    "Point of Order",
    "Business without Debate",
    "Speaker's Statement",
    "New Member",
    "Privilege",
    "Estimates",
    "Estimates Day"
  )
debates$procedure <- grepl(paste(procedure, collapse = "|"), debates$parent, ignore.case = TRUE)

### Legislation
debates$legislation <- grepl("Bill", debates$parent, ignore.case = TRUE)

### Petitions
petitions <- c("Petitions", "Petitions:")
debates$petitions <- grepl(paste(petitions, collapse = "|"), debates$parent, ignore.case = TRUE)

debates$debate_type <- "Other"
debates$debate_type[debates$question_time == "TRUE"] <- "Questions"
debates$debate_type[debates$pm_question_time == "TRUE"] <- "PMQs"
debates$debate_type[debates$legislation == "TRUE"] <- "Legislation"
debates$debate_type[debates$backbench_opposition_day == "TRUE"] <- "Opposition/Backbench"
debates$debate_type[debates$procedure == "TRUE"] <- "Procedure"

debates$debate_type <-
  factor(
    debates$debate_type,
    levels = c(
      "Other",
      "Questions",
      "PMQs",
      "Procedure",
      "Opposition/Backbench",
      "Legislation"
    )
  )

## Proportion of women
debates[, prop_women := sum(gender == "Female") / .N, by = section_id]

## Add woman leader variable
debates[, female_leader_present := as.logical("Female" %in% 
        unique(gender[attends_cabinet | attends_shadow_cabinet])), 
        by = section_id]

## Ministers

debates$is_gov_minister <- (debates$holds_government_position & !debates$attends_cabinet)
debates$is_opp_minister <- (debates$holds_opposition_position & !debates$attends_shadow_cabinet)

## Add cohort

cohort <- debates[, list(hdate = unique(house_start)), by = person_id]

cohort$cohort <- "2017-2019"
cohort[hdate < as.Date("2017-06-08") & hdate >= as.Date("2015-05-07")]$cohort <- "2015-2017"
cohort[hdate < as.Date("2015-05-07") & hdate >= as.Date("2010-05-06")]$cohort <- "2010-2015"
cohort[hdate < as.Date("2010-05-06") & hdate >= as.Date("2005-05-05")]$cohort <- "2005-2010"
cohort[hdate < as.Date("2005-05-05") & hdate >= as.Date("2001-06-07")]$cohort <- "2001-2005"
cohort[hdate < as.Date("2001-06-07") & hdate >= as.Date("1997-05-01")]$cohort <- "1997-2001"
cohort[hdate < as.Date("1997-05-01") & hdate >= as.Date("1992-04-09")]$cohort <- "1992-1997"
cohort[hdate < as.Date("1992-04-09") & hdate >= as.Date("1987-06-11")]$cohort <- "1987-1992"
cohort[hdate < as.Date("1987-06-11") & hdate >= as.Date("1983-06-09")]$cohort <- "1983-1987"
cohort[hdate < as.Date("1983-06-09") & hdate >= as.Date("1979-05-03")]$cohort <- "1979-1983"
cohort[hdate < as.Date("1979-05-03")]$parliamentary_term <- "1974-1979"

cohort$cohort <-
  factor(
    cohort$cohort,
    levels = c(
      "1979-1983",
      "1983-1987",
      "1987-1992",
      "1992-1997",
      "1997-2001",
      "2001-2005",
      "2005-2010",
      "2010-2015",
      "2015-2017",
      "2017-2019"
    )
  )

debates <-
  merge(debates, cohort[, c(1, 3)], by = "person_id", all = T)

## Government and opposition roles
sec_state <- c("Secretary of State")
debates$sec_state <-
  grepl(
    paste(sec_state, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
min_state <- c("Minister of State")
debates$min_state <-
  grepl(
    paste(min_state, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
shadow_sec_state <- c("Shadow Secretary of State")
debates$shadow_sec_state  <-
  grepl(
    paste(shadow_sec_state, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
shadow_min_state <- c("Shadow Minister")
debates$shadow_min_state  <-
  grepl(
    paste(shadow_min_state, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )

### Make a vector of gov. department names
defence <-
  c("Secretary of State for Defence",
    "Foreign Secretary and First Secretary of State")
economy <-
  c(
    "Chancellor of the Exchequer and First Secretary of State",
    "Chancellor of the Exchequer",
    "Secretary of State for Business, Enterprise and Regulatory Reform"
  )
agriculture <-
  c(
    "Secretary of State for Environment, Food and Rural Affairs",
    "Secretary of State for the Environment, Transport and the Regions",
    "Secretary of State for Agriculture, Fisheries and Food"
  )
health <-
  c("Secretary of State for Health and Social Care",
    "Secretary of State for Health") 
children <-
  c("Secretary of State for Children, Schools and Families")
education <-
  c(
    "Secretary of State for Education",
    "Secretary of State for Education and Employment",
    "Secretary of State for Education and Skills",
    "Secretary of State for Innovation, Universities and Skills",
    "Secretary of State for Education and Science",
    "Secretary of State for Children, Schools and Families"
  )
social_welfare <-
  c(
    "Secretary of State for Work and Pensions",
    "Secretary of State for Social Security",
    "Secretary of State for Social Security and Minister for Women"
  )
trade <-
  c(
    "Secretary of State for Trade",
    "Secretary of State for Trade and Industry",
    "Secretary of State for International Trade and President of the Board of Trade",
    "Secretary of State for Trade and Industry and Minister for Women and e-Minister in Cabinet"
  )
environment <- c("Secretary of State for Environment")
crime <-
  c("Lord Chancellor and Secretary of State for Justice",
    "Home Secretary")
transport <-
  c(
    "Secretary of State for Transport, Local Government and Regions",
    "Secretary of State for Transport"
  )
women <-
  c(
    "Minister for Women and Equalities",
    "Secretary of State for Social Security and Minister for Women"
  )

### Create dummy variables for positions
debates$defence_1 <-
  grepl(
    paste(defence, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$defence_2 <-
  grepl(
    paste(defence, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$defence_3 <-
  grepl(
    paste(defence, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$defence_secretary_gov <-
  debates$defence_1 == "TRUE" |
  debates$defence_2 == "TRUE" | debates$defence_3 == "TRUE"

debates$economy_1 <-
  grepl(
    paste(economy, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$economy_2 <-
  grepl(
    paste(economy, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$economy_3 <-
  grepl(
    paste(economy, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$economy_secretary_gov <-
  debates$economy_1 == "TRUE" |
  debates$economy_2 == "TRUE" | debates$economy_3 == "TRUE"

debates$agriculture_1 <-
  grepl(
    paste(agriculture, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$agriculture_2 <-
  grepl(
    paste(agriculture, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$agriculture_3 <-
  grepl(
    paste(agriculture, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$agriculure_secretary_gov <-
  debates$agriculture_1 == "TRUE" |
  debates$agriculture_2 == "TRUE" | debates$agriculture_3 == "TRUE"

debates$health_1 <-
  grepl(
    paste(health, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$health_2 <-
  grepl(
    paste(health, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$health_3 <-
  grepl(
    paste(health, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$health_secretary_gov <-
  debates$health_1 == "TRUE" |
  debates$health_2 == "TRUE" | debates$health_3 == "TRUE"

debates$children_secretary_gov <-
  debates$government_ministerial_position_1 == "Secretary of State for Children, Schools and Families" |
  debates$government_ministerial_position_2 == "Secretary of State for Children, Schools and Families" |
  debates$government_ministerial_position_3 == "Secretary of State for Children, Schools and Families"

debates$education_1 <-
  grepl(
    paste(education, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$education_2 <-
  grepl(
    paste(education, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$education_3 <-
  grepl(
    paste(education, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$education_secretary_gov <-
  debates$education_1 == "TRUE" |
  debates$education_2 == "TRUE" | debates$education_3 == "TRUE"

debates$social_welfare_1 <-
  grepl(
    paste(social_welfare, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$social_welfare_2 <-
  grepl(
    paste(social_welfare, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$social_welfare_3 <-
  grepl(
    paste(social_welfare, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$social_welfare_secretary_gov <-
  debates$social_welfare_1 == "TRUE" |
  debates$social_welfare_2 == "TRUE" |
  debates$social_welfare_3 == "TRUE"

debates$trade_1 <-
  grepl(
    paste(trade, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$trade_2 <-
  grepl(
    paste(trade, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$trade_3 <-
  grepl(
    paste(trade, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$trade_secretary_gov <-
  debates$trade_1 == "TRUE" |
  debates$trade_2 == "TRUE" | debates$trade_3 == "TRUE"

debates$environment_1 <-
  grepl(
    paste(environment, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$environment_2 <-
  grepl(
    paste(environment, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$environment_3 <-
  grepl(
    paste(environment, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$environment_secretary_gov <-
  debates$environment_1 == "TRUE" |
  debates$environment_2 == "TRUE" | debates$environment_3 == "TRUE"
debates$crime_1 <-
  grepl(
    paste(crime, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$crime_2 <-
  grepl(
    paste(crime, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$crime_3 <-
  grepl(
    paste(crime, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$crime_secretary_gov <-
  debates$crime_1 == "TRUE" |
  debates$crime_2 == "TRUE" | debates$crime_3 == "TRUE"

debates$transport_1 <-
  grepl(
    paste(transport, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$transport_2 <-
  grepl(
    paste(transport, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$transport_3 <-
  grepl(
    paste(transport, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$transport_secretary_gov <-
  debates$transport_1 == "TRUE" |
  debates$transport_2 == "TRUE" | debates$transport_3 == "TRUE"
debates$women_1 <-
  grepl(
    paste(women, collapse = "|"),
    debates$government_ministerial_position_1,
    ignore.case = TRUE
  )
debates$women_2 <-
  grepl(
    paste(women, collapse = "|"),
    debates$government_ministerial_position_2,
    ignore.case = TRUE
  )
debates$women_3 <-
  grepl(
    paste(women, collapse = "|"),
    debates$government_ministerial_position_3,
    ignore.case = TRUE
  )
debates$women_secretary_gov <-
  debates$women_1 == "TRUE" |
  debates$women_2 == "TRUE" | debates$women_3 == "TRUE"

## Shadow sec of state
defence <- c("Shadow Secretary of State for Defence")
economy <-
  c(
    "Shadow Chancellor of the Exchequer",
    "Shadow Lord Chancellor and Shadow Secretary of State for Justice (also Shadow Minister for London)",
    "Shadow Secretary of State (Cabinet Office) and Shadow Chancellor of the Duchy of Lancaster",
    "Shadow Chancellor of the Duchy of Lancaster",
    "Shadow Secretary of State for Industry",
    "Shadow Lord Chancellor and Shadow Secretary of State for Justice",
    "Shadow Secretary of State (Cabinet Office) and Shadow Chancellor of the Duchy of Lancaster"
  )
agriculture <-
  c("Shadow Secretary of State for Environment, Food and Rural Affairs")
health <- c("Shadow Secretary of State for Health")
children <-
  c(
    "Shadow Secretary of State for the Family",
    "Shadow Secretary of State for the Family, Culture, Media and Sport",
    "Shadow Secretary of State (Children, Schools and Families)"
  )
education <-
  c(
    "Shadow Secretary of State for Education",
    "Shadow Secretary of State (Education and Employment)",
    "Shadow Secretary of State (Education and Election Co-ordinator)",
    "Shadow Secretary of State (Children, Schools and Families)",
    "Shadow Secretary of State (Innovation, Universities and Skills)"
  )
social_welfare <-
  c("Shadow Secretary of State for Work and Pensions",
    "Secretary of State for Social Security")
trade <-
  c(
    "Shadow Secretary of State (Trade and Industry)",
    "Shadow Secretary of State for Trade and Industry",
    "Shadow Secretary of State for International Trade"
  )
environment <-
  c(
    "Shadow Secretary of State for Energy and Climate Change",
    "Shadow Secretary of State (Environment)",
    "Shadow Secretary of State for Environment and Transport",
    "Shadow Secretary of State for Environment, Transport and the Regions",
    "Shadow Secretary of State for Environment"
  )
crime <-
  c(
    "Shadow Lord Chancellor and Shadow Secretary of State for Justice (also Shadow Minister for London)",
    "Shadow Secretary of State (Home Office)",
    "Shadow Secretary of State (Justice)",
    "Shadow Lord Chancellor and Shadow Secretary of State for Justice"
  )
transport <-
  c(
    "Shadow Secretary of State for Transport, Local Government and the Regions",
    "Shadow Secretary of State for Transport",
    "Shadow Secretary of State for Environment and Transport",
    "Shadow Secretary of State for Environment, Transport and the Regions"
  )
women <-
  c(
    "Shadow Secretary of State for Women and Equalities",
    "Shadow Minister (Women and Equality)",
    "Shadow Minister (Women)",
    "Shadow Foreign Secretary and Minister for Women and Equalities",
    "Shadow Home Secretary and Shadow Minister for Women and Equalities",
    "Junior Shadow Minister for Women and Equalities",
    "Shadow Minister (Equalities Office) (Women and Equalities)",
    "Shadow Spokesperson (Women)"
  )

### Create dummy variables for positions
debates$defence_1 <-
  grepl(
    paste(defence, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$defence_2 <-
  grepl(
    paste(defence, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$defence_3 <-
  grepl(
    paste(defence, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$defence_secretary_opp <-
  debates$defence_1 == "TRUE" |
  debates$defence_2 == "TRUE" | debates$defence_3 == "TRUE"

debates$economy_1 <-
  grepl(
    paste(economy, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$economy_2 <-
  grepl(
    paste(economy, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$economy_3 <-
  grepl(
    paste(economy, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$economy_secretary_opp <-
  debates$economy_1 == "TRUE" |
  debates$economy_2 == "TRUE" | debates$economy_3 == "TRUE"

debates$agriculture_1 <-
  grepl(
    paste(agriculture, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$agriculture_2 <-
  grepl(
    paste(agriculture, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$agriculture_3 <-
  grepl(
    paste(agriculture, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$agriculure_secretary_opp <-
  debates$agriculture_1 == "TRUE" |
  debates$agriculture_2 == "TRUE" | debates$agriculture_3 == "TRUE"

debates$health_1 <-
  grepl(
    paste(health, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$health_2 <-
  grepl(
    paste(health, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$health_3 <-
  grepl(
    paste(health, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$health_secretary_opp <-
  debates$health_1 == "TRUE" |
  debates$health_2 == "TRUE" | debates$health_3 == "TRUE"

debates$children_1 <-
  grepl(
    paste(children, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$children_2 <-
  grepl(
    paste(children, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$children_3 <-
  grepl(
    paste(children, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$children_secretary_opp <-
  debates$children_1 == "TRUE" |
  debates$children_2 == "TRUE" | debates$children_3 == "TRUE"

debates$education_1 <-
  grepl(
    paste(education, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$education_2 <-
  grepl(
    paste(education, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$education_3 <-
  grepl(
    paste(education, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$education_secretary_opp <-
  debates$education_1 == "TRUE" |
  debates$education_2 == "TRUE" | debates$education_3 == "TRUE"

debates$social_welfare_1 <-
  grepl(
    paste(social_welfare, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$social_welfare_2 <-
  grepl(
    paste(social_welfare, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$social_welfare_3 <-
  grepl(
    paste(social_welfare, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$social_welfare_secretary_opp <-
  debates$social_welfare_1 == "TRUE" |
  debates$social_welfare_2 == "TRUE" |
  debates$social_welfare_3 == "TRUE"

debates$trade_1 <-
  grepl(
    paste(trade, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$trade_2 <-
  grepl(
    paste(trade, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$trade_3 <-
  grepl(
    paste(trade, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$trade_secretary_opp <-
  debates$trade_1 == "TRUE" |
  debates$trade_2 == "TRUE" | debates$trade_3 == "TRUE"

debates$environment_1 <-
  grepl(
    paste(environment, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$environment_2 <-
  grepl(
    paste(environment, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$environment_3 <-
  grepl(
    paste(environment, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$environment_secretary_opp <-
  debates$environment_1 == "TRUE" |
  debates$environment_2 == "TRUE" | debates$environment_3 == "TRUE"
debates$crime_1 <-
  grepl(
    paste(crime, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$crime_2 <-
  grepl(
    paste(crime, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$crime_3 <-
  grepl(
    paste(crime, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$crime_secretary_opp <-
  debates$crime_1 == "TRUE" |
  debates$crime_2 == "TRUE" | debates$crime_3 == "TRUE"

debates$transport_1 <-
  grepl(
    paste(transport, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$transport_2 <-
  grepl(
    paste(transport, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$transport_3 <-
  grepl(
    paste(transport, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$transport_secretary_opp <-
  debates$transport_1 == "TRUE" |
  debates$transport_2 == "TRUE" | debates$transport_3 == "TRUE"
debates$women_1 <-
  grepl(
    paste(women, collapse = "|"),
    debates$opposition_ministerial_position_1,
    ignore.case = TRUE
  )
debates$women_2 <-
  grepl(
    paste(women, collapse = "|"),
    debates$opposition_ministerial_position_2,
    ignore.case = TRUE
  )
debates$women_3 <-
  grepl(
    paste(women, collapse = "|"),
    debates$opposition_ministerial_position_3,
    ignore.case = TRUE
  )
debates$women_secretary_opp <-
  debates$women_1 == "TRUE" |
  debates$women_2 == "TRUE" | debates$women_3 == "TRUE"

## Committee types
### Create vectors of committee names (present and historic)
defence <-
  c("Foreign Affairs Committee",
    "Defence Committee",
    "Armed Forces Bill Committee")
economy <-
  c(
    "Treasury & Civil Service",
    "Treasury Committee",
    "Treasury & Civil Service Sub-Committee",
    "Treasury Sub-committee",
    "Finance and Services Committee",
    "Finance Committee (Commons)"
  )
agriculture <-
  c("Agriculture", "Environment, Food and Rural Affairs Committee")
health <-
  c("Health and Social Care Committee", "Health Select Committee")
children <- c("Children, Schools and Families")
education <-
  c(
    "Education Committee",
    "Innovation, Universities, Science and Skills Committee",
    "Business and Enterprise Committee",
    "Children, Schools and Families",
    "Education & Skills",
    "Education & Employment",
    "Education"
  )
social_welfare <-
  c("Work and Pensions Committee", "Social Security")
trade <- c("International Trade Committee", "Trade & Industry")
environment <-
  c(
    "Environment, Food and Rural Affairs Committee",
    "Energy and Climate Change Committee",
    "Environmental Audit Committee",
    "Environment, Transport & Regional Affairs",
    "Environment",
    "Environment Sub-committee"
  )
crime <-
  c("Justice Committee",
    "Home Affairs Committee",
    "Home Affairs Sub-Committee")
transport <-
  c(
    "Transport, Local Government & The Regions",
    "Environment, Transport & Regional Affairs",
    "Transport Committee",
    "Transport Sub-committee"
  )
women <-
  c(
    "Commons Reference Group on Representation and Inclusion",
    "Women and Equalities Committee"
  )

### Create dummy variables for committee membership
debates$defence_1 <-
  grepl(paste(defence, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$defence_2 <-
  grepl(paste(defence, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$defence_3 <-
  grepl(paste(defence, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$defence_4 <-
  grepl(paste(defence, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$defence_5 <-
  grepl(paste(defence, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$defence_comm <-
  debates$defence_1 == "TRUE" |
  debates$defence_2 == "TRUE" |
  debates$defence_3 == "TRUE" |
  debates$defence_4 == "TRUE" |
  debates$defence_5 == "TRUE"

debates$economy_1 <-
  grepl(paste(economy, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$economy_2 <-
  grepl(paste(economy, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$economy_3 <-
  grepl(paste(economy, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$economy_4 <-
  grepl(paste(economy, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$economy_5 <-
  grepl(paste(economy, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$economy_comm <-
  debates$economy_1 == "TRUE" |
  debates$economy_2 == "TRUE" |
  debates$economy_3 == "TRUE" |
  debates$economy_4 == "TRUE" | 
  debates$economy_5 == "TRUE"

debates$agriculture_1 <-
  grepl(paste(agriculture, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$agriculture_2 <-
  grepl(paste(agriculture, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$agriculture_3 <-
  grepl(paste(agriculture, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$agriculture_4 <-
  grepl(paste(agriculture, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$agriculture_5 <-
  grepl(paste(agriculture, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$agriculure_comm <-
  debates$agriculture_1 == "TRUE" |
  debates$agriculture_2 == "TRUE" |
  debates$agriculture_3 == "TRUE" |
  debates$agriculture_4 == "TRUE" | 
  debates$agriculture_5 == "TRUE"

debates$health_1 <-
  grepl(paste(health, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$health_2 <-
  grepl(paste(health, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$health_3 <-
  grepl(paste(health, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$health_4 <-
  grepl(paste(health, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$health_5 <-
  grepl(paste(health, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$health_comm <-
  debates$health_1 == "TRUE" |
  debates$health_2 == "TRUE" |
  debates$health_3 == "TRUE" |
  debates$health_4 == "TRUE" | 
  debates$health_5 == "TRUE"

debates$children_comm <-
  debates$committee_name_1 == "Children, Schools and Families" |
  debates$committee_name_2 == "Children, Schools and Families" |
  debates$committee_name_3 == "Children, Schools and Families" |
  debates$committee_name_4 == "Children, Schools and Families" |
  debates$committee_name_5 == "Children, Schools and Families"

debates$education_1 <-
  grepl(paste(education, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$education_2 <-
  grepl(paste(education, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$education_3 <-
  grepl(paste(education, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$education_4 <-
  grepl(paste(education, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$education_5 <-
  grepl(paste(education, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$education_comm <-
  debates$education_1 == "TRUE" |
  debates$education_2 == "TRUE" |
  debates$education_3 == "TRUE" |
  debates$education_4 == "TRUE" | 
  debates$education_5 == "TRUE"

debates$social_welfare_1 <-
  grepl(paste(social_welfare, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$social_welfare_2 <-
  grepl(paste(social_welfare, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$social_welfare_3 <-
  grepl(paste(social_welfare, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$social_welfare_4 <-
  grepl(paste(social_welfare, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$social_welfare_5 <-
  grepl(paste(social_welfare, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$social_welfare_comm <-
  debates$social_welfare_1 == "TRUE" |
  debates$social_welfare_2 == "TRUE" |
  debates$social_welfare_3 == "TRUE" |
  debates$social_welfare_4 == "TRUE" |
  debates$social_welfare_5 == "TRUE"

debates$trade_1 <-
  grepl(paste(trade, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$trade_2 <-
  grepl(paste(trade, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$trade_3 <-
  grepl(paste(trade, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$trade_4 <-
  grepl(paste(trade, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$trade_5 <-
  grepl(paste(trade, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$trade_comm <-
  debates$trade_1 == "TRUE" |
  debates$trade_2 == "TRUE" |
  debates$trade_3 == "TRUE" |
  debates$trade_4 == "TRUE" | 
  debates$trade_5 == "TRUE"

debates$environment_1 <-
  grepl(paste(environment, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$environment_2 <-
  grepl(paste(environment, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$environment_3 <-
  grepl(paste(environment, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$environment_4 <-
  grepl(paste(environment, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$environment_5 <-
  grepl(paste(environment, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$environment_comm <-
  debates$environment_1 == "TRUE" |
  debates$environment_2 == "TRUE" |
  debates$environment_3 == "TRUE" |
  debates$environment_4 == "TRUE" | 
  debates$environment_5 == "TRUE"
debates$crime_1 <-
  grepl(paste(crime, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$crime_2 <-
  grepl(paste(crime, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$crime_3 <-
  grepl(paste(crime, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$crime_4 <-
  grepl(paste(crime, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$crime_5 <-
  grepl(paste(crime, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$crime_comm <-
  debates$crime_1 == "TRUE" |
  debates$crime_2 == "TRUE" |
  debates$crime_3 == "TRUE" |
  debates$crime_4 == "TRUE" | 
  debates$crime_5 == "TRUE"

debates$transport_1 <-
  grepl(paste(transport, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$transport_2 <-
  grepl(paste(transport, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$transport_3 <-
  grepl(paste(transport, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$transport_4 <-
  grepl(paste(transport, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$transport_5 <-
  grepl(paste(transport, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$transport_comm <-
  debates$transport_1 == "TRUE" |
  debates$transport_2 == "TRUE" |
  debates$transport_3 == "TRUE" |
  debates$transport_4 == "TRUE" | 
  debates$transport_5 == "TRUE"
debates$women_1 <-
  grepl(paste(women, collapse = "|"),
        debates$committee_name_1,
        ignore.case = TRUE)
debates$women_2 <-
  grepl(paste(women, collapse = "|"),
        debates$committee_name_2,
        ignore.case = TRUE)
debates$women_3 <-
  grepl(paste(women, collapse = "|"),
        debates$committee_name_3,
        ignore.case = TRUE)
debates$women_4 <-
  grepl(paste(women, collapse = "|"),
        debates$committee_name_4,
        ignore.case = TRUE)
debates$women_5 <-
  grepl(paste(women, collapse = "|"),
        debates$committee_name_5,
        ignore.case = TRUE)
debates$women_comm <-
  debates$women_1 == "TRUE" |
  debates$women_2 == "TRUE" |
  debates$women_3 == "TRUE" |
  debates$women_4 == "TRUE" | 
  debates$women_5 == "TRUE"

## Remove unneeded variables 
debates$defence_1 <- NULL
debates$defence_2 <- NULL
debates$defence_3 <- NULL
debates$defence_4 <- NULL
debates$defence_5 <- NULL
debates$economy_1 <- NULL
debates$economy_2 <- NULL
debates$economy_3 <- NULL
debates$economy_4 <- NULL
debates$economy_5 <- NULL
debates$agriculture_1 <- NULL
debates$agriculture_2 <- NULL
debates$agriculture_3 <- NULL
debates$agriculture_4 <- NULL
debates$agriculture_5 <- NULL
debates$health_1 <- NULL
debates$health_2 <- NULL
debates$health_3 <- NULL
debates$health_4 <- NULL
debates$health_5 <- NULL
debates$education_1 <- NULL
debates$education_2 <- NULL
debates$education_3 <- NULL
debates$education_4 <- NULL
debates$education_5 <- NULL
debates$social_welfare_1 <- NULL
debates$social_welfare_2 <- NULL
debates$social_welfare_3 <- NULL
debates$social_welfare_4 <- NULL
debates$social_welfare_5 <- NULL
debates$trade_1 <- NULL
debates$trade_2 <- NULL
debates$trade_3 <- NULL
debates$trade_4 <- NULL
debates$environment_1 <- NULL
debates$environment_2 <- NULL
debates$environment_3 <- NULL
debates$environment_4 <- NULL
debates$environment_5 <- NULL
debates$crime_1 <- NULL
debates$crime_2 <- NULL
debates$crime_3 <- NULL
debates$crime_4 <- NULL
debates$crime_5 <- NULL
debates$transport_1 <- NULL
debates$transport_2 <- NULL
debates$transport_3 <- NULL
debates$transport_4 <- NULL
debates$transport_5 <- NULL
debates$women_1 <- NULL
debates$women_2 <- NULL
debates$women_3 <- NULL
debates$women_4 <- NULL
debates$women_5 <- NULL
debates$children_1 <- NULL
debates$children_2 <- NULL
debates$children_3 <- NULL
debates$trade_5 <- NULL
debates$X <- NULL

# Save data
save(debates, file =  "data/debates.Rdata")
